@BOOK{sutton1998reinforcement,
  title={Reinforcement learning: An introduction},
  author={Sutton, R.S. and Barto, A.G.},
  volume={1},
  number={1},
  year={1998},
  publisher={Cambridge Univ Press}
}

@book{wiering2012sota,
  title={Reinforcement Learning: State-of-the-art},
  editor={Wiering, M. and van Ottelo, Martijn},
  year={2012},
  publisher={Springer}
}

@ARTICLE{tesauro1995temporal,
    author = "G. Tesauro",
    title = "Temporal difference learning and {TD-Gammon}",
    year = "1995",
    journal = "Communications of the ACM",
    volume = "38",
    pages = "58--68"
}

@ARTICLE{sutton1988learning,
    author = "R. S. Sutton",
    title = "Learning to predict by the methods of temporal differences",
    year = "1988",
    journal = "Machine Learning",
    volume = "3",
    pages = "9--44",
}

@article{watkins1992q,
  title={Q-learning},
  author={Watkins, C.J.C.H. and Dayan, P.},
  journal={Machine learning},
  volume={8},
  number={3},
  pages={279--292},
  year={1992},
  publisher={Springer}
}

@article{vaneck2008application,
  title={Application of reinforcement learning to the game of Othello},
  author={van Eck, N.J. and van Wezel, M.},
  journal={Computers \& Operations Research},
  volume={35},
  number={6},
  pages={1999--2017},
  year={2008},
  publisher={Elsevier}
}

@misc{wiering2012neural,
  title={Neural-fitted TD-learning for playing Othello with structured neural
    networks},
  author={van den Dries, S. and Wiering, M.},
  note={Accepted in IEEE Transactions on Neural Networks and Learning System,
    2012.}
  year={2012}
}

@article{thrun1995learning,
  title={Learning to play the game of chess},
  author={Thrun, S.},
  journal={Advances in Neural Information Processing Systems},
  volume={7},
  year={1995},
  publisher={MORGAN KAUFMANN PUBLISHERS}
}

@book{rummery1995online,
  title={On-line Q-learning using connectionist systems},
  author={Rummery, G.A. and Niranjan, M.},
  year={1994},
  publisher={University of Cambridge, Department of Engineering}
}

@inproceedings{schaeffer2001temporal,
  title={Temporal difference learning applied to a high-performance game-playing
program},
  author={Schaeffer, J. and Hlynka, M. and Jussila, V.},
  booktitle={Proceedings of the 17th international joint conference on
Artificial intelligence-Volume 1},
  pages={529--534},
  year={2001},
  organization={Morgan Kaufmann Publishers Inc.}
}

@article{schraudolph1994temporal,
  title={Temporal difference learning of position evaluation in the game of Go},
  author={Schraudolph, N.N. and Dayan, P. and Sejnowski, T.J.},
  journal={Advances in Neural Information Processing Systems},
  pages={817--817},
  year={1994},
  publisher={Morgan Kaufmann Publishers}
}

@article{moriarty1995discovering,
  title={Discovering complex Othello strategies through evolutionary neural networks},
  author={Moriarty, D.E. and Miikkulainen, R.},
  journal={Connection Science},
  volume={7},
  number={3},
  pages={195--210},
  year={1995},
}

@article{leouski1996neural,
  title={What a neural network can learn about Othello},
  author={Leouski, A.V. and Utgoff, P.E.},
  journal={University of Massachusetts, Amherst, MA, Tech. Rep},
  pages={96--10},
  year={1996}
}

@inproceedings{papavassiliou1999convergence,
  title={Convergence of reinforcement learning with general function approximators},
  author={Papavassiliou, V.A. and Russell, S.},
  booktitle={International Joint Conference on Artificial Intelligence},
  volume={16},
  pages={748--757},
  year={1999},
  organization={LAWRENCE ERLBAUM ASSOCIATES LTD}
}

@inproceedings{lucas2006temporal,
  title={Temporal difference learning versus co-evolution for acquiring Othello
position evaluation},
  author={Lucas, S.M. and Runarsson, T.P.},
  booktitle={Computational Intelligence and Games, 2006 IEEE Symposium on},
  pages={52--59},
  year={2006},
  organization={IEEE}
}

@article{lucas2008learning,
  title={Learning to play othello with n-tuple systems},
  author={Lucas, S.M.},
  journal={Australian Journal of Intelligent Information Processing},
  volume={4},
  pages={1--20},
  year={2008}
}

@article{wiering2010self,
  title={Self-play and using an expert to learn to play backgammon with temporal
difference learning},
  author={Wiering, M.},
  journal={Journal of Intelligent Learning Systems and Applications},
  volume={2},
  number={2},
  pages={57--68},
  year={2010},
  publisher={Scientific Research Publishing}
}

@article{yoshioka1999strategy,
  title={Strategy acquisition for the game},
  author={Yoshioka, T. and Ishii, S.},
  journal={IEICE Transactions on Information and Systems},
  volume={82},
  number={12},
  pages={1618--1626},
  year={1999},
  publisher={The Institute of Electronics, Information and Communication
Engineers}
}

@article{buro1995statistical,
  title={Statistical Feature Combination for the Evaluation of Game Positions},
  author={Buro, M.},
  journal={Journal of Artificial Intelligence Research},
  volume={3},
  pages={373--382},
  year={1995}
}


